rm(list = ls())


library(haven)
library(cregg)

library(ggplot2)
library(dplyr)
library("devtools")
library(readxl)
library(fuzzyjoin)
require(quanteda)
require(quanteda.textstats)
library(haven)
library(data.table)

library("quanteda")
library("quanteda.textplots")


wave1_wide <- read_excel("Dropbox/Independent candidates Chile/01_data/raw_data/tex_data.xlsx")

# Independientes

table(wave1_wide$A26A)


corpus <- corpus(wave1_wide, text_field = "A26A")
docid <- paste(corpus$numericalId, sep = " ") 

tokens <- tokens(corpus, remove_punct = TRUE,  tolower = T, remove_numbers = T, stem = T)
tokens <- tokens_select(tokens, pattern = stopwords("spanish"), selection = "remove")
tokens <- tokens_tolower(tokens)

dfm <- dfm(tokens, tolower = TRUE)
dfm <- dfm(tokens, tolower = TRUE)
dfm <- dfm_remove(dfm, stopwords_manual )
dfm <- dfm_subset(dfm, ntoken(dfm) > 0)

libraries <- c("topicmodels", "dplyr", "stm", "quanteda", "lda", "nnet")
lapply(libraries, require, character.only = T)

# Wordcloud

set.seed(100)
textplot_wordcloud(dfm)

# Topic modeling

model_8 <- LDA(dfm, k = 5, method = "Gibbs", control = list(seed = 10012, iter = 3000))
words_model8 <- get_terms(model_8, 10)
words_model8

topics <- get_topics(model_8)
sort(table(topics), decreasing = TRUE)[1:10]

topics_prev <- data.frame(model_8@gamma)
topics_prev

# Do not distinguish topics, just do a word cloud

# Miembros de partidos

corpus <- corpus(wave1_wide, text_field = "A26B")
docid <- paste(corpus$numericalId, sep = " ") 

tokens <- tokens(corpus, remove_punct = TRUE,  tolower = T, remove_numbers = T, stem = T)
tokens <- tokens_select(tokens, pattern = stopwords("spanish"), selection = "remove")
tokens <- tokens_tolower(tokens)

dfm <- dfm(tokens, tolower = TRUE)
dfm <- dfm(tokens, tolower = TRUE)
dfm <- dfm_remove(dfm, stopwords_manual )
dfm <- dfm_subset(dfm, ntoken(dfm) > 0)

libraries <- c("topicmodels", "dplyr", "stm", "quanteda", "lda", "nnet")
lapply(libraries, require, character.only = T)

# Wordcloud

set.seed(7)
textplot_wordcloud(dfm)

